{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn import preprocessing\n",
    "from PIL import Image\n",
    "import os\n",
    "\n",
    "def PicManage(path,i):\n",
    "    pic = Image.open(path)\n",
    "    pic.c_x, pic.c_y = (int(i/2) for i in pic.size)\n",
    "    box = (pic.c_x-50, pic.c_y-50, pic.c_x+50, pic.c_y+50)\n",
    "    #从图片中提取中心100*100的子矩形\n",
    "    region = pic.crop(box)\n",
    "    \n",
    "    #切分RGB\n",
    "    r, g, b = np.split(np.array(region), 3, axis = 2)\n",
    "    \n",
    "    #计算一阶矩\n",
    "    r_m1 = np.mean(r)\n",
    "    g_m1 = np.mean(g)\n",
    "    b_m1 = np.mean(b)\n",
    "    \n",
    "    #二阶矩\n",
    "    r_m2 = np.std(r)\n",
    "    g_m2 = np.std(g)\n",
    "    b_m2 = np.std(b)\n",
    "    \n",
    "    #三阶矩\n",
    "    r_m3 = np.mean(abs(r - r.mean())**3)**(1/3)\n",
    "    g_m3 = np.mean(abs(g - g.mean())**3)**(1/3)\n",
    "    b_m3 = np.mean(abs(b - b.mean())**3)**(1/3)\n",
    "    \n",
    "    #将数据标准化，区间在[-1,1]\n",
    "    typ = np.array([i])\n",
    "    arr = np.array([r_m1,g_m1,b_m1,r_m2,g_m2,b_m2,r_m3,g_m3,b_m3])\n",
    "    #df = pd.DataFrame(preprocessing.minmax_scale(arr,feature_range=(-1,1))).T\n",
    "    df = pd.DataFrame(arr).T\n",
    "    dn = pd.DataFrame(typ).T\n",
    "    return df,dn\n",
    "\n",
    "result = []\n",
    "type_result = []\n",
    "for i in os.listdir('./data/images'):\n",
    "    if i.endswith('.jpg'):\n",
    "        df,dn = PicManage('./data/images/'+i,int(i[0]))\n",
    "        result.append(df)\n",
    "        type_result.append(dn)\n",
    "        \n",
    "data = pd.concat(result)\n",
    "typ = pd.concat(type_result)\n",
    "data = pd.DataFrame(preprocessing.normalize(data,norm='l2'))   \n",
    "data['type'] = typ.values\n",
    "data.to_excel('./data/picData.xls',index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(203, 10)\n"
     ]
    }
   ],
   "source": [
    "print(data.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train accuracy: 1.000000\n",
      "test accuracy: 0.975610\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "E:\\pythonenv\\datamining\\lib\\site-packages\\sklearn\\svm\\base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "#-*- coding:utf-8 -*-\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "#datapath = './data/moment.csv'\n",
    "#data = pd.read_csv(datapath,encoding = 'gbk')\n",
    "data = data.values\n",
    "\n",
    "#划分训练集和测试集\n",
    "#cross_validation在sklearn0.20中改为model_selection\n",
    "from sklearn.model_selection  import train_test_split\n",
    "train, test, train_target, test_target = train_test_split(data[:,0:],data[:,-1],test_size=0.2)\n",
    "train_target = train_target.astype(int)\n",
    "test_target = test_target.astype(int)\n",
    "\n",
    "#构建SVM模型\n",
    "from sklearn import svm\n",
    "model = svm.SVC()\n",
    "model.fit(train*30,train_target)\n",
    "\n",
    "#save model\n",
    "from sklearn.externals import joblib\n",
    "joblib.dump(model,'svcmodel.pkl')\n",
    "\n",
    "#read model\n",
    "#model = joblib.load('svcmodel.pkl')\n",
    "\n",
    "#混淆矩阵\n",
    "from sklearn import metrics\n",
    "cm_train = metrics.confusion_matrix(train_target, model.predict(train*30))\n",
    "cm_test = metrics.confusion_matrix(test_target, model.predict(test*30))\n",
    "\n",
    "train_accuracy = metrics.accuracy_score(train_target,model.predict(train*30))\n",
    "test_accuracy = metrics.accuracy_score(test_target,model.predict(test*30))\n",
    "\n",
    "print(\"train accuracy: %f\"% train_accuracy) #0.94444\n",
    "print(\"test accuracy: %f\"% test_accuracy) #1.0000\n",
    "\n",
    "tr = pd.DataFrame(cm_train,index = range(1,6),columns = range(1,6)).to_excel('./data/train.xls')\n",
    "te = pd.DataFrame(cm_test,index = range(1,6),columns = range(1,6)).to_excel('./data/test.xls')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "datamining",
   "language": "python",
   "name": "datamining"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
